
load("~QTAdatafile.Rdata")
library(tidytext)
library(textdata)
tidy_data <- simple %>% 
  unnest_tokens(word, Q21) %>%
  filter(str_detect(word, "[a-z]"))
tidy_data<- tidy_data %>%
  filter(!word %in% stop_words$word)

tidy_data %>%
  count(word, sort = TRUE)

remove_reg <- c("https;", "t.co;", "&amp;","&lt;","&gt;","<p>", "</p>","&rsquo", "&lsquo;",  "&#39;", "<strong>", "</strong>", "rsquo", "em", "ndash", "nbsp", "lsquo", "strong")
reg_match <- paste0(remove_reg, collapse = "|")

tidy_data <- tidy_data %>%
  filter(!word %in% remove_reg)
tidy_data %>%
  count(word, sort = TRUE)
tidy_data$gayword <- as.integer(grepl("gay|homo|sexuality|LGBT|homosexual|two men|two|men|same sex|same-sex|same|orientation|gender|sexual|biological|sex", 
                                      x = tidy_data$word))

tidy_data$moneyword <- as.integer(grepl("money|pay|paid|welfare|commodity|payment|transaction|cost|exploit|exploitation|exploited|afford|ethics|ethical|vulnerable|inequality|resources|celebrity|famous", 
                                        x = tidy_data$word))
tidy_data <- tidy_data %>%
  mutate(homo = as.factor(homo),
         support = as.factor(support))
tidy_noC <-tidy_data %>%
  filter(treatment!=0)
tidy_no <-tidy_data %>%
  filter(support==0)

###Models effect of treatment on Pr gay issues mentioned###
mod1<- lm(gayword ~homo, data=tidy_data)
tidy_data$predictedX<-predict(mod1, tidy_data)

summ(mod1)
gay1<- effect_plot(mod1, pred=homo,
                   cat.geom="point", cat.interval.geom="linerange",
                   colors="black", cat.pred.point.size=3)+
  labs(y="Pr(Sexuality terms mentioned)", x="")+
  scale_x_discrete(labels=c("0" = "Hetero. couple\ntreatment", "1" = "Homo. couple\ntreatment"))+
  annotate(
    geom="text", x = 2.15, y = .065, size = 3,
    color = "black", fontface=2,
    label = ".065")+
  annotate(
    geom="text", x = 1.1, y = .03, size = 3,
    color = "black", fontface=2,
    label = ".03")+
  ylim(0,.13)+
  theme_ggdist()


mod1b<- lm(gayword ~homo*support, data=tidy_data)
summ(mod1b)
tidy_data$predictedX2<-predict(mod1b, tidy_data)

gay2<- cat_plot(mod1b, pred = homo, modx = support,
                interval = TRUE, colors=colors,
                geom="point", interval.geom = "linerange")+
  ylim(0,.13)+
  labs(y="Pr(Sexuality terms mentioned)", x="")+
  scale_x_discrete(labels=c("0" = "Hetero. couple\ntreatment", "1" = "Homo. couple\ntreatment"))+
  theme_ggdist()+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank())+
  annotate(
    geom="text", x = .92, y = .045, size = 3,
    color = "#FCA636FF", fontface=2,
    label = ".045")+
  annotate(
    geom="text", x = 1.92, y = .097, size = 3,
    color = "#FCA636FF", fontface=2,
    label = ".097")+
  annotate(
    geom="text", x = 1.35, y = .026, size = 3,
    color = "#C5407EFF", fontface=2,
    label = ".026")+
  annotate(
    geom="text", x = 2.35, y = .053, size = 3,
    color = "#C5407EFF", fontface=2,
    label = ".053")+
  annotate(
    geom="text", x = 1, y = .1, size = 4,
    color = "#C5407EFF", fontface=2,
    label = "Supports surrogacy")+
  annotate(
    geom="text", x = 1, y = .11, size = 4,
    color = "#FCA636FF", fontface=2,
    label = "Opposes surrogacy")

gay1+gay2+ 
  plot_annotation(title = 'Quantitative text analysis of open-ended survey responses') & 
  theme(text = element_text(face=2))


mod2<- lm(moneyword ~prestige, data=tidy_data)
tidy_data$predictedX2a<-predict(mod2, tidy_data)
summ(mod2)
mod2b<- lm(moneyword ~prestige*support, data=tidy_data)
summ(mod2b)
tidy_data$predictedX2b<-predict(mod2b, tidy_data)


money1<- effect_plot(mod2, pred=prestige,
                     cat.geom="point", cat.interval.geom="linerange",
                     colors="black", cat.pred.point.size=3)+
  labs(y="Pr(Transactional terms mentioned)", x="")+
  scale_x_discrete(labels=c("0" = "Ordinary couple\ntreatment", "1" = "Celebrity couple\ntreatment"))+
  annotate(
    geom="text", x = 2.15, y = .044, size = 3,
    color = "black", fontface=2,
    label = ".045")+
  annotate(
    geom="text", x = 1.11, y = .045, size = 3,
    color = "black", fontface=2,
    label = ".044")+
  ylim(0,.13)+
  theme_ggdist()

money2<- cat_plot(mod2b, pred = prestige, modx = support,
                  interval = TRUE, colors=colors,
                  geom="point", interval.geom = "linerange")+
  ylim(0,.13)+
  labs(y="Pr(Transactional terms mentioned)", x="")+
  scale_x_discrete(labels=c("0" = "Ordinary couple\ntreatment", "1" = "Celebrity couple\ntreatment"))+
  theme_ggdist()+
  theme(legend.position = "none",
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank())+
  annotate(
    geom="text", x = 2, y = .11, size = 4,
    color = "#C5407EFF", fontface=2,
    label = "Supports surrogacy")+
  annotate(
    geom="text", x = 2, y = .12, size = 4,
    color = "#FCA636FF", fontface=2,
    label = "Opposes surrogacy")+
  annotate(
    geom="text", x = .92, y = .094, size = 3,
    color = "#FCA636FF", fontface=2,
    label = ".094")+
  annotate(
    geom="text", x = 1.92, y = .081, size = 3,
    color = "#FCA636FF", fontface=2,
    label = ".081")+
  annotate(
    geom="text", x = 1.35, y = .031, size = 3,
    color = "#C5407EFF", fontface=2,
    label = ".031")+
  annotate(
    geom="text", x = 2.35, y = .03, size = 3,
    color = "#C5407EFF", fontface=2,
    label = ".03")
money1+money2


(gay1+gay2)/(money1+money2)+ 
  plot_annotation(title = 'Quantitative text analysis of open-ended survey responses') & 
  theme(text = element_text(face=2))
ggsave("Figure5.tiff", dpi=300, height=18, width=18, units="cm")
